import json
from flask import Blueprint, request
import statsmodels.api as sm
from scipy.stats import f
import pandas as pd
import numpy as np
from factor_analyzer import FactorAnalyzer
from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity, calculate_kmo

multivariateStatistics_api=Blueprint('multivariateStatistics_api',__name__)

# 多变量统计
# ---------------------------------------------------------------------------
"""
  多因素方差分析-检测
  :param data: 包含多个因素的字典，每个因素对应一个数组
  :return: 检验结果和过程
 """
@multivariateStatistics_api.route("/anova/check",methods=["POST"])
def multiFactorAnovaTest():
    # 获取数据
    data = request.form.get("data")
    # 转换数据
    data = json.loads(data)
    n_factors = len(data)
    n_total = len(list(data.values())[0])
    n_each = int(n_total / n_factors)

    ss_between = 0  # 组间平方和
    ss_within = 0  # 组内平方和
    for factor, values in data.items():
        mean_factor = np.mean(values)
        ss_factor = n_each * (mean_factor - np.mean(list(data.values())))**2
        ss_within += np.sum((values - mean_factor)**2)
        ss_between += ss_factor

    df_between = n_factors - 1
    df_within = n_total - n_factors
    f_value = (ss_between / df_between) / (ss_within / df_within)
    p_value = 1 - f.cdf(f_value, df_between, df_within)

    return {'SS_between': ss_between,
            'SS_within': ss_within,
            'df_between': df_between,
            'df_within': df_within,
            'F-value': f_value,
            'P-value': p_value
            }
# 测试
# data = {
#         "factor1": [22, 28, 24, 25, 23, 29],
#         "factor2": [25, 30, 28, 27, 26, 31],
#         "factor3": [20, 23, 22, 21, 19, 24]
#     }



@multivariateStatistics_api.route("/mRegression/check",methods=["POST"])
def multiRegressionAnalysisTest():
    """
            多重回归分析

            参数：
            y: 一维数组，因变量
            *x: 多个一维数组，自变量

            返回值：
            result: 回归分析结果的字典，包含系数，方程和显著性检验
            """
    x = request.form.get("x")
    y = request.form.get("y")

    y = json.loads(y)
    x = np.array(json.loads(x))

    # 将自变量放在矩阵X中
    X = np.column_stack(x)
    # 添加截距项
    X = sm.add_constant(X)

    # 拟合回归模型
    model = sm.OLS(y, X).fit()

    # 提取回归系数
    coefs = model.params

    # 生成回归方程
    eqn = "y = "
    for i, coef in enumerate(coefs):
        if i == 0:
            eqn += "{:.2f}".format(coef)
        else:
            eqn += " + {:.2f}x{}".format(coef, i)

    # 进行显著性检验
    p_values = model.pvalues[1:]  # 去除截距项的p值
    # significant = [p < 0.05 for p in p_values]
    significant = p_values

    # 构造结果字典
    result = {
        "coefficients": coefs.tolist(),
        "equation": eqn,
        "significant": significant.tolist()
    }

    return result

# 测试案例
# x = [[1, 2, 3, 4, 5],[2, 4, 6, 8, 10],[3, 7, 9, 15, 20],[7, 9, 88, 99, 100],[7, 9, 88, 97, 100]]
# y = [1, 2, 3, 4, 5]


@multivariateStatistics_api.route("/mRegression/predict",methods=["POST"])
def multiRegressionAnalysisPrediction():
    equation = request.form.get("equation")
    args =tuple(json.loads(request.form.get("args")))
    kwargs = json.loads(request.form.get("kwargs"))
    print(type(args),args)
    print(type(kwargs),kwargs)
    # 将自变量和常数项的值传入字典中
    variable_values = {f'x{i + 1}': arg for i, arg in enumerate(args)}
    variable_values.update(kwargs)

    # 将字典中的值替换为表达式
    for key, value in variable_values.items():
        equation = equation.replace(key, str(value))

    # 将方程字符串转化为可计算的表达式，并计算结果
    result = eval(equation)
    return {"result":result}
# 测试数据
# 一个包含多个自变量和常数项的方程
# equation = "x1**2 + x2**2 + x3**2 + a"
# args=(1,2,3)
# kwargs={"a":10}



@multivariateStatistics_api.route("/factor/check",methods=["POST"])
def factorAnalysisTest():
    # 生成一个随机数据集
    # np.random.seed(0)
    df = pd.DataFrame(np.random.rand(100, 10), columns=['var' + str(i) for i in range(1, 11)])

    # 创建因子分析对象，指定因子数量和旋转方法
    fa = FactorAnalyzer(n_factors=3, rotation='varimax')

    # 执行因子分析
    fa.fit(df)

    # 计算 KMO 和 Bartlett's 检验
    kmo_all, kmo_model = calculate_kmo(df)
    bartlett = calculate_bartlett_sphericity(df)
    return {"data":df.to_json(),"kmo_all":kmo_all.tolist(),"kmo_model":kmo_model,"bartlett":bartlett,"fa.loadings_":fa.loadings_.tolist()}